library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readxl)
library(gtsummary)
library(survival)
library(survminer)
## Loading required package: ggpubr
library(sjPlot)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(lemon)
## 
## Attaching package: 'lemon'
## The following object is masked from 'package:purrr':
## 
##     %||%
## The following objects are masked from 'package:ggplot2':
## 
##     CoordCartesian, element_render
library(knitr)
rm(list=ls())
os <- sessionInfo()$running
if (str_detect(os, "Ubuntu")) {
  path <- '~/Biostatistics_M215_Project/data/'
} else if (str_detect(os, "mac")) {
  path <- '~/Downloads/Biostat 215 Project/Biostatistics_M215_Project/Data/'
} else if(str_detect(os, "Windows")){
  path <- "C:/Users/alexc/Desktop/215/Biostatistics_M215_Project/data/"
}

Data clean and recode

endpoint = read_excel(paste0(path, 'endpoints.xls'))
endpoint_colnames = c('ID', 'Intervention Group', 
                      'Vitality Status as of 6/1/2006', 
                      'Breast Cancer Status as of 6/1/2006 or last prior contact',
                      'Other Cancer (invasive, not breast) Status as of 6/1/2006',
                      'Breast Cancer Contribute to Death',
                      'Year Breast Cancer Diagnosed', 'Cancer Grade',
                      'Dummy Variable for Cancer Grade 2', 
                      'Dummy Variable for Cancer Grade 3',
                      'Dummy Variable for Unknown Cancer Grade',
                      'Cancer Stage, AJCC 6th', 
                      'Dummy Variable for Stage 2 AJCC 6th',
                      'Dummy Variable for Stage 3 AJCC 6th',
                      'WHEL Clinical Site', 'Recurrence Flag',
                      'Years from Diagnosis to WHEL Study Entry',
                      'Years from Study Entry to Recurrence/New Primary, or to Censor',
                      'Years from Diagnosis to Recurrence/New Primary, or to Censor',
                      'Years from Diagnosis to Death or Censor')
colnames(endpoint) = endpoint_colnames
endpoint$`Intervention Group` = ifelse(endpoint$`Intervention Group` == 3, 'Intervention', 'Comparison')
endpoint$`Vitality Status as of 6/1/2006` = ifelse(endpoint$`Vitality Status as of 6/1/2006` == 0, 'Dead', ifelse(endpoint$`Vitality Status as of 6/1/2006` == 1, 'Alive', 'Unknown'))
for (i in 1:nrow(endpoint)){
if (endpoint$`Breast Cancer Status as of 6/1/2006 or last prior contact`[i] == 0){
  endpoint$`Breast Cancer Status as of 6/1/2006 or last prior contact`[i] = 'No Evidence of Recurrence'
} else if(endpoint$`Breast Cancer Status as of 6/1/2006 or last prior contact`[i] == 1){
  endpoint$`Breast Cancer Status as of 6/1/2006 or last prior contact`[i] = 'Confirmed – New Primary Breast Cancer'
}else if(endpoint$`Breast Cancer Status as of 6/1/2006 or last prior contact`[i] == 2){
  endpoint$`Breast Cancer Status as of 6/1/2006 or last prior contact`[i] = 'Confirmed – Local Recurrence'
}else if(endpoint$`Breast Cancer Status as of 6/1/2006 or last prior contact`[i] == 3){
  endpoint$`Breast Cancer Status as of 6/1/2006 or last prior contact`[i] = 'Confirmed – Regional Recurrence'
}else{
  endpoint$`Breast Cancer Status as of 6/1/2006 or last prior contact`[i] = 'Confirmed – Distant Recurrence '}
}
endpoint$`Other Cancer (invasive, not breast) Status as of 6/1/2006` = ifelse(endpoint$`Other Cancer (invasive, not breast) Status as of 6/1/2006` == 0, 'No evidence of Disease', ifelse(endpoint$`Other Cancer (invasive, not breast) Status as of 6/1/2006` == 1, 'Reported Other Cancer (not confirmed)', 'Confirmed Other Cancer'))
endpoint$`Breast Cancer Contribute to Death`[endpoint$`Breast Cancer Contribute to Death` == -1] = 'Not Dead'
endpoint$`Breast Cancer Contribute to Death`[endpoint$`Breast Cancer Contribute to Death` == 0] = 'Dead from a cause other than Breast Cancer'
endpoint$`Breast Cancer Contribute to Death`[endpoint$`Breast Cancer Contribute to Death` == 1] = 'Dead from Breast Cancer'
endpoint$`Breast Cancer Contribute to Death`[endpoint$`Breast Cancer Contribute to Death` == 2] = 'Dead from Cancer, not confirmed breast but likely so'
endpoint$`Cancer Grade`[endpoint$`Cancer Grade` == 0] = 'Grade Not Applicable or Not Available'
endpoint$`Cancer Grade`[endpoint$`Cancer Grade` == 1] = 'Grade I, Well Differentiated'
endpoint$`Cancer Grade`[endpoint$`Cancer Grade` == 2] = 'Grade II, Moderately Differentiated'
endpoint$`Cancer Grade`[endpoint$`Cancer Grade` == 3] = 'Grade III, Poorly Differentiated'
endpoint$`Cancer Stage, AJCC 6th`[endpoint$`Cancer Stage, AJCC 6th` == 1] = 'Stage I'
endpoint$`Cancer Stage, AJCC 6th`[endpoint$`Cancer Stage, AJCC 6th` == 2] = 'Stage IIA'
endpoint$`Cancer Stage, AJCC 6th`[endpoint$`Cancer Stage, AJCC 6th` == 3] = 'Stage IIB'
endpoint$`Cancer Stage, AJCC 6th`[endpoint$`Cancer Stage, AJCC 6th` == 4] = 'Stage IIIA'
endpoint$`Cancer Stage, AJCC 6th`[endpoint$`Cancer Stage, AJCC 6th` == 5] = 'Stage IIIB'
endpoint$`WHEL Clinical Site`[endpoint$`WHEL Clinical Site` == 1] = 'Site A in California'
endpoint$`WHEL Clinical Site`[endpoint$`WHEL Clinical Site` == 3] = 'Site B in California'
endpoint$`WHEL Clinical Site`[endpoint$`WHEL Clinical Site` == 5] = 'Site C in California'
endpoint$`WHEL Clinical Site`[endpoint$`WHEL Clinical Site` == 7] = 'Site in Arizona'
endpoint$`WHEL Clinical Site`[endpoint$`WHEL Clinical Site` == 9] = 'Site D in California'
endpoint$`WHEL Clinical Site`[endpoint$`WHEL Clinical Site` == 11] = 'Site in Texas'
endpoint$`WHEL Clinical Site`[endpoint$`WHEL Clinical Site` == 13] = 'Site in Oregon'
endpoint$`Recurrence Flag`[endpoint$`Recurrence Flag` == 0] = 'No Invasive Breast Cancer Events'
endpoint$`Recurrence Flag`[endpoint$`Recurrence Flag` == 1] = 'Invasive Breast Cancer Event'

Baseline Characteristics of WHEL Study Participants by Study Group

endpoint %>%
  tbl_summary(by = 'Intervention Group',
              include=-c(ID, `Year Breast Cancer Diagnosed`,
                         `Dummy Variable for Cancer Grade 2`,
                         `Dummy Variable for Cancer Grade 3`,
                         `Dummy Variable for Unknown Cancer Grade`,
                         `Dummy Variable for Stage 2 AJCC 6th`,
                         `Dummy Variable for Stage 3 AJCC 6th`),
              missing_text = "Missing") %>%
  add_p()
Characteristic Comparison, N = 1,5511 Intervention, N = 1,5371 p-value2
Vitality Status as of 6/1/2006 0.8
Alive 1,391 (90%) 1,381 (90%)
Dead 160 (10%) 155 (10%)
Unknown 0 (0%) 1 (<0.1%)
Breast Cancer Status as of 6/1/2006 or last prior contact 0.6
Confirmed – Distant Recurrence 189 (12%) 168 (11%)
Confirmed – Local Recurrence 28 (1.8%) 35 (2.3%)
Confirmed – New Primary Breast Cancer 35 (2.3%) 43 (2.8%)
Confirmed – Regional Recurrence 10 (0.6%) 10 (0.7%)
No Evidence of Recurrence 1,289 (83%) 1,281 (83%)
Other Cancer (invasive, not breast) Status as of 6/1/2006 0.5
Confirmed Other Cancer 60 (3.9%) 58 (3.8%)
No evidence of Disease 1,483 (96%) 1,472 (96%)
Reported Other Cancer (not confirmed) 4 (0.3%) 1 (<0.1%)
Missing 4 6
Breast Cancer Contribute to Death 0.8
Dead from a cause other than Breast Cancer 25 (1.6%) 28 (1.8%)
Dead from Breast Cancer 135 (8.7%) 126 (8.2%)
Dead from Cancer, not confirmed breast but likely so 0 (0%) 1 (<0.1%)
Not Dead 1,391 (90%) 1,382 (90%)
Cancer Grade >0.9
Grade I, Well Differentiated 245 (16%) 239 (16%)
Grade II, Moderately Differentiated 620 (40%) 620 (40%)
Grade III, Poorly Differentiated 557 (36%) 551 (36%)
Grade Not Applicable or Not Available 129 (8.3%) 127 (8.3%)
Cancer Stage, AJCC 6th >0.9
Stage I 607 (39%) 584 (38%)
Stage IIA 511 (33%) 515 (34%)
Stage IIB 190 (12%) 194 (13%)
Stage IIIA 185 (12%) 188 (12%)
Stage IIIB 58 (3.7%) 56 (3.6%)
WHEL Clinical Site >0.9
Site A in California 258 (17%) 272 (18%)
Site B in California 226 (15%) 210 (14%)
Site C in California 267 (17%) 257 (17%)
Site D in California 260 (17%) 256 (17%)
Site in Arizona 242 (16%) 233 (15%)
Site in Oregon 117 (7.5%) 116 (7.5%)
Site in Texas 181 (12%) 193 (13%)
Recurrence Flag 0.9
Invasive Breast Cancer Event 262 (17%) 256 (17%)
No Invasive Breast Cancer Events 1,289 (83%) 1,281 (83%)
Years from Diagnosis to WHEL Study Entry 1.76 (1.05, 2.81) 1.84 (1.04, 2.80) 0.8
Years from Study Entry to Recurrence/New Primary, or to Censor 7.12 (5.87, 8.44) 7.11 (5.83, 8.53) >0.9
Years from Diagnosis to Recurrence/New Primary, or to Censor 8.97 (7.36, 10.64) 9.00 (7.35, 10.67) 0.8
Years from Diagnosis to Death or Censor 9.18 (7.76, 10.87) 9.28 (7.82, 10.89) 0.6

1 n (%); Median (IQR)

2 Fisher's exact test; Pearson's Chi-squared test; Wilcoxon rank sum test

(Table 5) Intervention Effects on All-Cause Mortality by Baseline Demographic and Clinical Characteristics

1 Preprocessing

demo = read_excel("../Data/demographics.xls")
phbase = read_excel("../Data/phbase.xls")
nds = read_excel("../Data/ndsfoody4.xls")
medical = read_excel("../Data/Medical.xls")

# We need the following variables:

## Survival time
SurvTime = as.numeric(endpoint$`Years from Diagnosis to Death or Censor`)
## Group and Status
Group = as_factor(endpoint$`Intervention Group`)
Group = relevel(Group, ref = "Comparison")
a = as_factor(endpoint$`Vitality Status as of 6/1/2006`)
Status = ifelse(a == "Alive", 0, 1)
## Age at randomization, y
AgeIdx = ifelse(demo$`age at rand` < 55, "<55", ">=55") # Age indicator (<=55 or not)
## Cancer stage at diagnosis
a = endpoint$`Cancer Stage, AJCC 6th`
CancerStage = as_factor(a)
## Hormone receptor status
a = medical$`Estr Recep`
b = medical$`Prog Recep`
HormoneRecep = ifelse(a==1 & b==1, "ER+/PR+",
                      ifelse(a==1 & b==0, "ER+/PR-",
                             ifelse(a==0 & b==1, "ER-/PR+",
                                    ifelse(a==0 & b==0, "ER-/PR-", NA))))
## Time from Diag to Rand
a = endpoint$`Years from Diagnosis to WHEL Study Entry`
#TimeDiagRand = as.numeric(ifelse(a <=1, 0,
#                        ifelse(a <=2, 1,
#                               ifelse(a <=3, 2, 3
#                        )))) # Time from diagnosis to randomization
TimeDiagRand = a
## Tumor differentiation
a = endpoint$`Cancer Grade`
TumorDiff = as_factor(a)
## No. of positive nodes (Number axillary lymph nodes positive)
a = medical$`Node Pos`
PosNodes = as_factor(ifelse(a==0, "0", 
                  ifelse(a <= 3, "1~3",
                         ifelse(a <= 6, "4~6", ">6"))))
PosNodes = relevel(PosNodes, ref = "0")
## Tumor size
a = medical$`Tumor Size` 
TumorSize = as_factor(ifelse(a < 2, "0~2", 
                   ifelse(a < 3, "2~3",
                          ifelse(a < 4, "3~4",
                                 ifelse(a < 5, "4~5", ">5")))))
TumorSize = relevel(TumorSize, ref="0~2")
## Physical activity 
a = phbase$`NEW METS` 
PhysicalAct = ifelse(a <= 210, "<210", 
                      ifelse(a <= 615, "211~615",
                             ifelse(a <= 1290, "616~1290", ">1290"))) 
## Energy intake
b = matrix(NA, nrow = length(a), ncol=1)
colnames(b) = "KCal"
b[endpoint$ID %in% nds$ID] = nds$Kcal
KCal = as_factor(ifelse(b <= 1430, "<1430",
              ifelse(b <= 1680, "1430~1680",
                     ifelse(b <= 1980, "1681~1980", 
                            ifelse(b > 1980, ">1980", NA)))))
KCal = relevel(KCal, ref="<1430")

##### PUT THEM TOGETHER #####
AllCauseMortalityData = data.frame(
  SurvTime, Group, Status, AgeIdx, CancerStage, HormoneRecep, TimeDiagRand,
        TumorDiff, PosNodes, TumorSize, PhysicalAct, KCal
)
library(DT)
AllCauseMortalityData %>%
  mutate(SurvTime = round(SurvTime, 3),
         Status = ifelse(Status==0, "Alive", "Dead")) %>%
  datatable(., extensions = 'Scroller', options = list(
    deferRender = TRUE,
    scrollY = 200,
    scroller = TRUE
))

2 Imputation

library(miceRanger)
ImputedAll <- AllCauseMortalityData %>%
  miceRanger(
  m = 3, 
  returnModels = TRUE,
  verbose = TRUE
)
## 
## Process started at 2021-11-20 12:18:00 
## 
## dataset 1 
## iteration 1   | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 2   | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 3   | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 4   | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 5   | HormoneRecep | PosNodes | TumorSize | KCal
## 
## dataset 2 
## iteration 1   | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 2   | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 3   | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 4   | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 5   | HormoneRecep | PosNodes | TumorSize | KCal
## 
## dataset 3 
## iteration 1   | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 2   | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 3   | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 4   | HormoneRecep | PosNodes | TumorSize | KCal
## iteration 5   | HormoneRecep | PosNodes | TumorSize | KCal
ImputedData = as.data.frame(completeData(ImputedAll)[[3]])

Quality Check

#plotModelError(ImputedAll, vars = 'allNumeric')
#plotDistributions(ImputedAll, vars = 'allNumeric')
plotVarImportance(ImputedAll)

3 Universal and stratified Cox regression

library(sjPlot)
library(sjmisc)
library(sjlabelled)

# Standard table
UniversalMod = coxph(Surv(SurvTime, Status) ~ ., data=ImputedData)
tab_model(UniversalMod)
  Surv(SurvTime, Status)
Predictors Estimates CI p
Group [Intervention] 0.97 0.78 – 1.21 0.803
AgeIdx [>=55] 1.64 1.30 – 2.07 <0.001
CancerStage [Stage I] 0.81 0.52 – 1.26 0.349
CancerStage [Stage IIB] 1.24 0.78 – 1.96 0.357
CancerStage [Stage IIIA] 0.77 0.28 – 2.14 0.618
CancerStage [Stage IIIB] 0.93 0.29 – 2.97 0.896
HormoneRecep [ER-/PR+] 1.14 0.68 – 1.90 0.628
HormoneRecep [ER+/PR-] 0.93 0.64 – 1.35 0.707
HormoneRecep [ER+/PR+] 0.74 0.55 – 0.98 0.036
TimeDiagRand 0.63 0.56 – 0.72 <0.001
TumorDiff [Grade II,
Moderately
Differentiated]
1.21 0.77 – 1.88 0.405
TumorDiff [Grade I, Well
Differentiated]
0.57 0.31 – 1.04 0.067
TumorDiff [Grade III,
Poorly Differentiated]
1.33 0.85 – 2.08 0.212
PosNodes [1~3] 1.15 0.73 – 1.79 0.546
PosNodes [4~6] 2.67 0.90 – 7.92 0.077
PosNodes [>6] 4.42 1.46 – 13.41 0.009
TumorSize [2~3] 1.65 1.16 – 2.36 0.006
TumorSize [3~4] 1.42 0.90 – 2.25 0.133
TumorSize [4~5] 1.46 0.86 – 2.49 0.164
TumorSize [>5] 1.78 1.06 – 2.98 0.029
PhysicalAct [>1290] 0.66 0.47 – 0.92 0.015
PhysicalAct [211~615] 1.04 0.78 – 1.38 0.812
PhysicalAct [616~1290] 0.85 0.63 – 1.16 0.313
KCal [1430~1680] 0.79 0.58 – 1.07 0.124
KCal [>1980] 1.39 1.02 – 1.89 0.036
KCal [1681~1980] 0.95 0.70 – 1.28 0.728
Observations 3088
R2 Nagelkerke 0.097

Model diagnosis for the universal model

# Model Diagnosis
## Martingale Residual
MartResid = residuals(UniversalMod, type="martingale")
ggplot(data = ImputedData, mapping = aes(x = CancerStage, y = MartResid)) +
    geom_violin() +
    geom_point() +
    labs(title = "Martingale Residual Plot") +
    theme_bw() + theme(legend.key = element_blank())

ggplot(data = ImputedData, mapping = aes(x = KCal, y = MartResid)) +
    geom_violin() +
    geom_point() +
    labs(title = "Martingale Residual Plot") +
    theme_bw() + theme(legend.key = element_blank())

ggplot(data = ImputedData, mapping = aes(x = TimeDiagRand, y = MartResid)) +
    geom_point() +
    geom_smooth() +
    labs(title = "Time from Diagnosis to Randomization") +
    theme_bw() + theme(legend.key = element_blank())

# Cox-Snell Residual
Event = ImputedData$Status
CSResid = -(MartResid - Event)
fit_coxsnell <- coxph(formula = Surv(CSResid, Event) ~ 1,
                      ties    = c("efron","breslow","exact")[1])

## Nelson-Aalen estimator for baseline hazard (all covariates zero)
df_base_haz <- basehaz(fit_coxsnell, centered = FALSE)
## Plot
ggplot(data = df_base_haz, mapping = aes(x = time, y = hazard)) +
    geom_point() +
    labs(x = "Cox-Snell residuals as pseudo observed times",
         y = "Estimated cumulative hazard at pseudo observed times") +
    theme_bw() + theme(legend.key = element_blank()) +
    geom_abline(intercept = 0, color="blue", cex=1, alpha=0.5)

We have already fitted a universal model, now we are going to fit models for both groups.

library(survival)
library(finalfit)

dependent_os <- "Surv(SurvTime, Status)"
explanatory <- c("AgeIdx", "CancerStage", "HormoneRecep", "TimeDiagRand",
        "TumorDiff", "PosNodes", "TumorSize", "PhysicalAct", "KCal")
ImputedData %>%
  summary_factorlist(dependent_os, 
  explanatory, fit_id=TRUE) -> TabStart

TabStart
##         label                                levels         all
##        AgeIdx                                   <55 1825 (59.1)
##                                                >=55 1263 (40.9)
##   CancerStage                             Stage IIA 1026 (33.2)
##                                             Stage I 1191 (38.6)
##                                           Stage IIB  384 (12.4)
##                                          Stage IIIA  373 (12.1)
##                                          Stage IIIB   114 (3.7)
##  HormoneRecep                               ER-/PR-  632 (20.5)
##                                             ER-/PR+   133 (4.3)
##                                             ER+/PR-  374 (12.1)
##                                             ER+/PR+ 1949 (63.1)
##  TimeDiagRand                             Mean (SD)   2.0 (1.0)
##     TumorDiff Grade Not Applicable or Not Available   256 (8.3)
##                 Grade II, Moderately Differentiated 1240 (40.2)
##                        Grade I, Well Differentiated  484 (15.7)
##                    Grade III, Poorly Differentiated 1108 (35.9)
##      PosNodes                                     0 1775 (57.5)
##                                                 1~3  885 (28.7)
##                                                 4~6   231 (7.5)
##                                                  >6   197 (6.4)
##     TumorSize                                   0~2 1523 (49.3)
##                                                 2~3  863 (27.9)
##                                                 3~4  335 (10.8)
##                                                 4~5   151 (4.9)
##                                                  >5   216 (7.0)
##   PhysicalAct                                  <210  850 (27.5)
##                                               >1290  738 (23.9)
##                                             211~615  751 (24.3)
##                                            616~1290  749 (24.3)
##          KCal                                 <1430 1186 (38.4)
##                                           1430~1680  776 (25.1)
##                                               >1980  453 (14.7)
##                                           1681~1980  673 (21.8)
##                                          fit_id index
##                                       AgeIdx<55     1
##                                      AgeIdx>=55     2
##                            CancerStageStage IIA     3
##                              CancerStageStage I     4
##                            CancerStageStage IIB     5
##                           CancerStageStage IIIA     6
##                           CancerStageStage IIIB     7
##                             HormoneRecepER-/PR-     8
##                             HormoneRecepER-/PR+     9
##                             HormoneRecepER+/PR-    10
##                             HormoneRecepER+/PR+    11
##                                    TimeDiagRand    12
##  TumorDiffGrade Not Applicable or Not Available    13
##    TumorDiffGrade II, Moderately Differentiated    14
##           TumorDiffGrade I, Well Differentiated    15
##       TumorDiffGrade III, Poorly Differentiated    16
##                                       PosNodes0    17
##                                     PosNodes1~3    18
##                                     PosNodes4~6    19
##                                      PosNodes>6    20
##                                    TumorSize0~2    21
##                                    TumorSize2~3    22
##                                    TumorSize3~4    23
##                                    TumorSize4~5    24
##                                     TumorSize>5    25
##                                 PhysicalAct<210    26
##                                PhysicalAct>1290    27
##                              PhysicalAct211~615    28
##                             PhysicalAct616~1290    29
##                                       KCal<1430    30
##                                   KCal1430~1680    31
##                                       KCal>1980    32
##                                   KCal1681~1980    33
# Cox model for the intervention group
ImputedData %>%
  filter(Group=="Intervention") %>%
  finalfit(dependent_os, explanatory) -> TabIntervention
knitr::kable(TabIntervention, row.names=FALSE, align=c("l", "l", "r", "r", "r", "r"))
Dependent: Surv(SurvTime, Status) all HR (univariable) HR (multivariable)
AgeIdx <55 908 (100.0) - -
>=55 629 (100.0) 1.16 (0.85-1.59, p=0.348) 1.65 (1.19-2.30, p=0.003)
CancerStage Stage IIA 515 (100.0) - -
Stage I 584 (100.0) 0.69 (0.43-1.09, p=0.110) 1.27 (0.67-2.39, p=0.467)
Stage IIB 194 (100.0) 2.13 (1.34-3.39, p=0.001) 1.18 (0.62-2.24, p=0.608)
Stage IIIA 188 (100.0) 2.51 (1.60-3.93, p<0.001) 0.29 (0.04-2.35, p=0.245)
Stage IIIB 56 (100.0) 3.58 (1.95-6.56, p<0.001) 0.27 (0.03-2.60, p=0.258)
HormoneRecep ER-/PR- 303 (100.0) - -
ER-/PR+ 53 (100.0) 0.97 (0.45-2.05, p=0.926) 1.00 (0.46-2.16, p=0.997)
ER+/PR- 201 (100.0) 0.81 (0.50-1.33, p=0.403) 0.85 (0.51-1.42, p=0.530)
ER+/PR+ 980 (100.0) 0.51 (0.35-0.74, p<0.001) 0.62 (0.41-0.95, p=0.027)
TimeDiagRand Mean (SD) 2.0 (1.0) 0.69 (0.59-0.82, p<0.001) 0.66 (0.55-0.78, p<0.001)
TumorDiff Grade Not Applicable or Not Available 127 (100.0) - -
Grade II, Moderately Differentiated 620 (100.0) 1.49 (0.74-3.01, p=0.263) 1.52 (0.75-3.10, p=0.250)
Grade I, Well Differentiated 239 (100.0) 0.84 (0.36-1.96, p=0.680) 1.01 (0.43-2.38, p=0.983)
Grade III, Poorly Differentiated 551 (100.0) 2.26 (1.13-4.51, p=0.021) 1.87 (0.91-3.83, p=0.086)
PosNodes 0 879 (100.0) - -
1~3 437 (100.0) 1.78 (1.21-2.60, p=0.003) 1.96 (1.03-3.73, p=0.041)
4~6 116 (100.0) 2.94 (1.79-4.85, p<0.001) 8.74 (1.01-75.85, p=0.049)
>6 105 (100.0) 4.50 (2.87-7.07, p<0.001) 13.48 (1.52-119.35, p=0.019)
TumorSize 0~2 753 (100.0) - -
2~3 421 (100.0) 2.61 (1.76-3.87, p<0.001) 2.39 (1.43-3.98, p=0.001)
3~4 175 (100.0) 1.82 (1.05-3.15, p=0.033) 1.31 (0.65-2.66, p=0.448)
4~5 79 (100.0) 2.99 (1.61-5.56, p=0.001) 1.92 (0.88-4.15, p=0.099)
>5 109 (100.0) 4.07 (2.45-6.75, p<0.001) 3.15 (1.51-6.58, p=0.002)
PhysicalAct <210 443 (100.0) - -
>1290 351 (100.0) 0.61 (0.38-0.99, p=0.047) 0.68 (0.42-1.10, p=0.115)
211~615 368 (100.0) 1.08 (0.72-1.61, p=0.717) 1.14 (0.76-1.71, p=0.530)
616~1290 375 (100.0) 0.84 (0.55-1.29, p=0.429) 0.91 (0.59-1.40, p=0.655)
KCal <1430 605 (100.0) - -
1430~1680 388 (100.0) 0.73 (0.48-1.12, p=0.148) 0.78 (0.51-1.19, p=0.250)
>1980 212 (100.0) 1.34 (0.88-2.06, p=0.177) 1.47 (0.95-2.27, p=0.086)
1681~1980 332 (100.0) 0.76 (0.49-1.18, p=0.216) 0.87 (0.56-1.37, p=0.557)
# Cox model for the comparison group
ImputedData %>%
  filter(Group=="Comparison") %>%
  finalfit(dependent_os, explanatory) -> TabComparison
knitr::kable(TabIntervention, row.names=FALSE, align=c("l", "l", "r", "r", "r", "r"))
Dependent: Surv(SurvTime, Status) all HR (univariable) HR (multivariable)
AgeIdx <55 908 (100.0) - -
>=55 629 (100.0) 1.16 (0.85-1.59, p=0.348) 1.65 (1.19-2.30, p=0.003)
CancerStage Stage IIA 515 (100.0) - -
Stage I 584 (100.0) 0.69 (0.43-1.09, p=0.110) 1.27 (0.67-2.39, p=0.467)
Stage IIB 194 (100.0) 2.13 (1.34-3.39, p=0.001) 1.18 (0.62-2.24, p=0.608)
Stage IIIA 188 (100.0) 2.51 (1.60-3.93, p<0.001) 0.29 (0.04-2.35, p=0.245)
Stage IIIB 56 (100.0) 3.58 (1.95-6.56, p<0.001) 0.27 (0.03-2.60, p=0.258)
HormoneRecep ER-/PR- 303 (100.0) - -
ER-/PR+ 53 (100.0) 0.97 (0.45-2.05, p=0.926) 1.00 (0.46-2.16, p=0.997)
ER+/PR- 201 (100.0) 0.81 (0.50-1.33, p=0.403) 0.85 (0.51-1.42, p=0.530)
ER+/PR+ 980 (100.0) 0.51 (0.35-0.74, p<0.001) 0.62 (0.41-0.95, p=0.027)
TimeDiagRand Mean (SD) 2.0 (1.0) 0.69 (0.59-0.82, p<0.001) 0.66 (0.55-0.78, p<0.001)
TumorDiff Grade Not Applicable or Not Available 127 (100.0) - -
Grade II, Moderately Differentiated 620 (100.0) 1.49 (0.74-3.01, p=0.263) 1.52 (0.75-3.10, p=0.250)
Grade I, Well Differentiated 239 (100.0) 0.84 (0.36-1.96, p=0.680) 1.01 (0.43-2.38, p=0.983)
Grade III, Poorly Differentiated 551 (100.0) 2.26 (1.13-4.51, p=0.021) 1.87 (0.91-3.83, p=0.086)
PosNodes 0 879 (100.0) - -
1~3 437 (100.0) 1.78 (1.21-2.60, p=0.003) 1.96 (1.03-3.73, p=0.041)
4~6 116 (100.0) 2.94 (1.79-4.85, p<0.001) 8.74 (1.01-75.85, p=0.049)
>6 105 (100.0) 4.50 (2.87-7.07, p<0.001) 13.48 (1.52-119.35, p=0.019)
TumorSize 0~2 753 (100.0) - -
2~3 421 (100.0) 2.61 (1.76-3.87, p<0.001) 2.39 (1.43-3.98, p=0.001)
3~4 175 (100.0) 1.82 (1.05-3.15, p=0.033) 1.31 (0.65-2.66, p=0.448)
4~5 79 (100.0) 2.99 (1.61-5.56, p=0.001) 1.92 (0.88-4.15, p=0.099)
>5 109 (100.0) 4.07 (2.45-6.75, p<0.001) 3.15 (1.51-6.58, p=0.002)
PhysicalAct <210 443 (100.0) - -
>1290 351 (100.0) 0.61 (0.38-0.99, p=0.047) 0.68 (0.42-1.10, p=0.115)
211~615 368 (100.0) 1.08 (0.72-1.61, p=0.717) 1.14 (0.76-1.71, p=0.530)
616~1290 375 (100.0) 0.84 (0.55-1.29, p=0.429) 0.91 (0.59-1.40, p=0.655)
KCal <1430 605 (100.0) - -
1430~1680 388 (100.0) 0.73 (0.48-1.12, p=0.148) 0.78 (0.51-1.19, p=0.250)
>1980 212 (100.0) 1.34 (0.88-2.06, p=0.177) 1.47 (0.95-2.27, p=0.086)
1681~1980 332 (100.0) 0.76 (0.49-1.18, p=0.216) 0.87 (0.56-1.37, p=0.557)

4 Model Diagnosis

(a) Intervention Group

# Intervention
IdxInt = ImputedData$Group=="Intervention"
IntMod = coxph(Surv(SurvTime, Status) ~ ., data=ImputedData[IdxInt,])

# Martingale Residual
MartResid = residuals(IntMod, type="martingale")
ggplot(data = ImputedData[IdxInt,], mapping = aes(x = CancerStage, y = MartResid)) +
    geom_violin() +
    geom_point() +
    labs(title = "Martingale Residual Plot for Cancer Stage") +
    theme_bw() + theme(legend.key = element_blank()) + ylab("Martingale Residual")

ggplot(data = ImputedData[IdxInt,], mapping = aes(x = TimeDiagRand, y = MartResid)) +
    geom_point() +
    geom_smooth() +
    labs(title = "Time from Diagnosis to Randomization") +
    theme_bw() + theme(legend.key = element_blank()) + ylab("Martingale Residual")

ggplot(data = ImputedData[IdxInt,], mapping = aes(x = TumorDiff, y = MartResid)) +
    geom_point() +
    geom_violin() +
    labs(title = "Tumor Differentiation") +
    theme_bw() + theme(legend.key = element_blank(), axis.text.x=element_text(angle=60,hjust=1)) +
    ylab("Martingale Residual")

ggplot(data = ImputedData[IdxInt,], mapping = aes(x = KCal, y = MartResid)) +
    geom_point() +
    geom_violin() +
    labs(title = "KCal") +
    theme_bw() + theme(legend.key = element_blank(), axis.text.x=element_text(angle=0,hjust=1)) +
    ylab("Martingale Residual")

ggplot(data = ImputedData[IdxInt,], mapping = aes(x = factor(TumorSize), y = MartResid)) +
    geom_violin() +
    geom_point() +
    labs(title = "Tumor Size") +
    theme_bw() + theme(legend.key = element_blank()) + 
    ylab("Martingale Residual") + xlab("Tumor Size")

# Schoenfeld Individual Test
test.ph = cox.zph(IntMod)
ggcoxzph(test.ph)

# Cox-Snell Residual
Event = ImputedData$Status[IdxInt]
CSResid = -(MartResid - Event)
fit_coxsnell <- coxph(formula = Surv(CSResid, Event) ~ 1,
                      ties    = c("efron","breslow","exact")[1])

## Nelson-Aalen estimator for baseline hazard (all covariates zero)
df_base_haz <- basehaz(fit_coxsnell, centered = FALSE)
## Plot
ggplot(data = df_base_haz, mapping = aes(x = time, y = hazard)) +
    geom_point() +
    labs(x = "Cox-Snell residuals as pseudo observed times",
         y = "Estimated cumulative hazard at pseudo observed times") +
    theme_bw() + theme(legend.key = element_blank()) +
    geom_abline(intercept = 0, color="blue", cex=1, alpha=0.5)

(b) Comparison Group

# Comparison
IdxCom = ImputedData$Group=="Comparison"
ComMod = coxph(Surv(SurvTime, Status) ~ ., data=ImputedData[IdxCom,],  method = 'breslow')

# Martingale Residual
MartResid = residuals(ComMod, type="martingale")
ggplot(data = ImputedData[IdxCom,], mapping = aes(x = CancerStage, y = MartResid)) +
    geom_violin() +
    geom_point() +
    labs(title = "Martingale Residual Plot for Cancer Stage") +
    theme_bw() + theme(legend.key = element_blank()) + ylab("Martingale Residual")

ggplot(data = ImputedData[IdxCom,], mapping = aes(x = TimeDiagRand, y = MartResid)) +
    geom_point() +
    geom_smooth() +
    labs(title = "Time from Diagnosis to Randomization") +
    theme_bw() + theme(legend.key = element_blank()) + ylab("Martingale Residual")

ggplot(data = ImputedData[IdxCom,], mapping = aes(x = TumorDiff, y = MartResid)) +
    geom_point() +
    geom_violin() +
    labs(title = "Tumor Differentiation") +
    theme_bw() + theme(legend.key = element_blank(), axis.text.x=element_text(angle=60,hjust=1)) +
    ylab("Martingale Residual")

ggplot(data = ImputedData[IdxCom,], mapping = aes(x = KCal, y = MartResid)) +
    geom_point() +
    geom_violin() +
    labs(title = "KCal") +
    theme_bw() + theme(legend.key = element_blank(), axis.text.x=element_text(angle=0,hjust=1)) +
    ylab("Martingale Residual")

ggplot(data = ImputedData[IdxCom,], mapping = aes(x = factor(TumorSize), y = MartResid)) +
    geom_violin() +
    geom_point() +
    labs(title = "Tumor Size") +
    theme_bw() + theme(legend.key = element_blank()) + 
    ylab("Martingale Residual") + xlab("Tumor Size")

# Schoenfeld Individual Test
test.ph = cox.zph(ComMod)
ggcoxzph(test.ph)

# Cox-Snell Residual
Event = ImputedData$Status[IdxCom]
CSResid = -(MartResid - Event)
fit_coxsnell <- coxph(formula = Surv(CSResid, Event) ~ 1,
                      ties    = c("efron","breslow","exact")[1])

## Nelson-Aalen estimator for baseline hazard (all covariates zero)
df_base_haz <- basehaz(fit_coxsnell, centered = FALSE)
## Plot
ggplot(data = df_base_haz, mapping = aes(x = time, y = hazard)) +
    geom_point() +
    labs(x = "Cox-Snell residuals as pseudo observed times",
         y = "Estimated cumulative hazard at pseudo observed times") +
    theme_bw() + theme(legend.key = element_blank()) +
    geom_abline(intercept = 0, color="blue", cex=1, alpha=0.5) +
  labs(title = "Quality of overall fit: Cox-Snell")

5 Cox Regression for each variable

explanatory = c("AgeIdx", "CancerStage", "HormoneRecep", 
        "TumorDiff", "PosNodes", "TumorSize", "PhysicalAct", "KCal")
for (item in explanatory) {
  len = ImputedData %>%
    select(item) %>%
    unique(.) %>%
    nrow(.)
  category = ImputedData %>%
      select(item) %>%
      unique(.)
  hi = item
  for (i in 1:len) {
    cat = category[i,]
    print(paste0("Explanatory: ", hi))
    print(paste0("Category: ", cat))
    idx = ImputedData[, hi] == cat
    print(summary(coxph(Surv(SurvTime, Status) ~ Group, data=ImputedData[idx, ])))
  }
}
## [1] "Explanatory: AgeIdx"
## [1] "Category: <55"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 1825, number of events= 170 
## 
##                       coef exp(coef) se(coef)    z Pr(>|z|)
## GroupIntervention 0.003098  1.003103 0.153395 0.02    0.984
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention     1.003     0.9969    0.7426     1.355
## 
## Concordance= 0.496  (se = 0.02 )
## Likelihood ratio test= 0  on 1 df,   p=1
## Wald test            = 0  on 1 df,   p=1
## Score (logrank) test = 0  on 1 df,   p=1
## 
## [1] "Explanatory: AgeIdx"
## [1] "Category: >=55"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 1263, number of events= 146 
## 
##                       coef exp(coef) se(coef)      z Pr(>|z|)
## GroupIntervention -0.04775   0.95338  0.16559 -0.288    0.773
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention    0.9534      1.049    0.6891     1.319
## 
## Concordance= 0.5  (se = 0.022 )
## Likelihood ratio test= 0.08  on 1 df,   p=0.8
## Wald test            = 0.08  on 1 df,   p=0.8
## Score (logrank) test = 0.08  on 1 df,   p=0.8
## 
## [1] "Explanatory: CancerStage"
## [1] "Category: Stage IIA"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 1026, number of events= 91 
## 
##                      coef exp(coef) se(coef)      z Pr(>|z|)
## GroupIntervention -0.2017    0.8174   0.2107 -0.957    0.339
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention    0.8174      1.223    0.5408     1.235
## 
## Concordance= 0.527  (se = 0.027 )
## Likelihood ratio test= 0.92  on 1 df,   p=0.3
## Wald test            = 0.92  on 1 df,   p=0.3
## Score (logrank) test = 0.92  on 1 df,   p=0.3
## 
## [1] "Explanatory: CancerStage"
## [1] "Category: Stage I"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 1191, number of events= 65 
## 
##                      coef exp(coef) se(coef)     z Pr(>|z|)
## GroupIntervention 0.07616   1.07914  0.24811 0.307    0.759
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention     1.079     0.9267    0.6636     1.755
## 
## Concordance= 0.516  (se = 0.033 )
## Likelihood ratio test= 0.09  on 1 df,   p=0.8
## Wald test            = 0.09  on 1 df,   p=0.8
## Score (logrank) test = 0.09  on 1 df,   p=0.8
## 
## [1] "Explanatory: CancerStage"
## [1] "Category: Stage IIB"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 384, number of events= 52 
## 
##                     coef exp(coef) se(coef)     z Pr(>|z|)
## GroupIntervention 0.4093    1.5058   0.2857 1.433    0.152
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention     1.506     0.6641    0.8601     2.636
## 
## Concordance= 0.553  (se = 0.036 )
## Likelihood ratio test= 2.1  on 1 df,   p=0.1
## Wald test            = 2.05  on 1 df,   p=0.2
## Score (logrank) test = 2.08  on 1 df,   p=0.1
## 
## [1] "Explanatory: CancerStage"
## [1] "Category: Stage IIIA"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 373, number of events= 70 
## 
##                      coef exp(coef) se(coef)     z Pr(>|z|)
## GroupIntervention 0.03257   1.03311  0.23917 0.136    0.892
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention     1.033      0.968    0.6465     1.651
## 
## Concordance= 0.505  (se = 0.031 )
## Likelihood ratio test= 0.02  on 1 df,   p=0.9
## Wald test            = 0.02  on 1 df,   p=0.9
## Score (logrank) test = 0.02  on 1 df,   p=0.9
## 
## [1] "Explanatory: CancerStage"
## [1] "Category: Stage IIIB"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 114, number of events= 38 
## 
##                      coef exp(coef) se(coef)      z Pr(>|z|)  
## GroupIntervention -0.5660    0.5678   0.3371 -1.679   0.0932 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention    0.5678      1.761    0.2933     1.099
## 
## Concordance= 0.567  (se = 0.041 )
## Likelihood ratio test= 2.93  on 1 df,   p=0.09
## Wald test            = 2.82  on 1 df,   p=0.09
## Score (logrank) test = 2.89  on 1 df,   p=0.09
## 
## [1] "Explanatory: HormoneRecep"
## [1] "Category: ER+/PR+"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 1949, number of events= 164 
## 
##                       coef exp(coef) se(coef)      z Pr(>|z|)
## GroupIntervention -0.09479   0.90956  0.15629 -0.607    0.544
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention    0.9096      1.099    0.6696     1.236
## 
## Concordance= 0.51  (se = 0.02 )
## Likelihood ratio test= 0.37  on 1 df,   p=0.5
## Wald test            = 0.37  on 1 df,   p=0.5
## Score (logrank) test = 0.37  on 1 df,   p=0.5
## 
## [1] "Explanatory: HormoneRecep"
## [1] "Category: ER-/PR+"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 133, number of events= 18 
## 
##                     coef exp(coef) se(coef)     z Pr(>|z|)
## GroupIntervention 0.2632    1.3011   0.4753 0.554     0.58
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention     1.301     0.7686    0.5125     3.303
## 
## Concordance= 0.541  (se = 0.062 )
## Likelihood ratio test= 0.3  on 1 df,   p=0.6
## Wald test            = 0.31  on 1 df,   p=0.6
## Score (logrank) test = 0.31  on 1 df,   p=0.6
## 
## [1] "Explanatory: HormoneRecep"
## [1] "Category: ER-/PR-"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 632, number of events= 89 
## 
##                      coef exp(coef) se(coef)     z Pr(>|z|)
## GroupIntervention 0.08126   1.08465  0.21203 0.383    0.702
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention     1.085      0.922    0.7158     1.644
## 
## Concordance= 0.511  (se = 0.027 )
## Likelihood ratio test= 0.15  on 1 df,   p=0.7
## Wald test            = 0.15  on 1 df,   p=0.7
## Score (logrank) test = 0.15  on 1 df,   p=0.7
## 
## [1] "Explanatory: HormoneRecep"
## [1] "Category: ER+/PR-"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 374, number of events= 45 
## 
##                      coef exp(coef) se(coef)     z Pr(>|z|)
## GroupIntervention 0.03224   1.03276  0.30016 0.107    0.914
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention     1.033     0.9683    0.5735      1.86
## 
## Concordance= 0.493  (se = 0.039 )
## Likelihood ratio test= 0.01  on 1 df,   p=0.9
## Wald test            = 0.01  on 1 df,   p=0.9
## Score (logrank) test = 0.01  on 1 df,   p=0.9
## 
## [1] "Explanatory: TumorDiff"
## [1] "Category: Grade Not Applicable or Not Available"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 256, number of events= 24 
## 
##                      coef exp(coef) se(coef)     z Pr(>|z|)
## GroupIntervention -0.4682    0.6261   0.4219 -1.11    0.267
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention    0.6261      1.597    0.2739     1.431
## 
## Concordance= 0.554  (se = 0.051 )
## Likelihood ratio test= 1.27  on 1 df,   p=0.3
## Wald test            = 1.23  on 1 df,   p=0.3
## Score (logrank) test = 1.25  on 1 df,   p=0.3
## 
## [1] "Explanatory: TumorDiff"
## [1] "Category: Grade II, Moderately Differentiated"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 1240, number of events= 123 
## 
##                       coef exp(coef) se(coef)     z Pr(>|z|)
## GroupIntervention -0.09382   0.91045  0.18049 -0.52    0.603
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention    0.9104      1.098    0.6392     1.297
## 
## Concordance= 0.509  (se = 0.024 )
## Likelihood ratio test= 0.27  on 1 df,   p=0.6
## Wald test            = 0.27  on 1 df,   p=0.6
## Score (logrank) test = 0.27  on 1 df,   p=0.6
## 
## [1] "Explanatory: TumorDiff"
## [1] "Category: Grade I, Well Differentiated"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 484, number of events= 20 
## 
##                     coef exp(coef) se(coef)     z Pr(>|z|)
## GroupIntervention 0.5902    1.8044   0.4691 1.258    0.208
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention     1.804     0.5542    0.7196     4.525
## 
## Concordance= 0.538  (se = 0.06 )
## Likelihood ratio test= 1.66  on 1 df,   p=0.2
## Wald test            = 1.58  on 1 df,   p=0.2
## Score (logrank) test = 1.63  on 1 df,   p=0.2
## 
## [1] "Explanatory: TumorDiff"
## [1] "Category: Grade III, Poorly Differentiated"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 1108, number of events= 149 
## 
##                      coef exp(coef) se(coef)     z Pr(>|z|)
## GroupIntervention 0.03348   1.03405  0.16386 0.204    0.838
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention     1.034     0.9671      0.75     1.426
## 
## Concordance= 0.507  (se = 0.021 )
## Likelihood ratio test= 0.04  on 1 df,   p=0.8
## Wald test            = 0.04  on 1 df,   p=0.8
## Score (logrank) test = 0.04  on 1 df,   p=0.8
## 
## [1] "Explanatory: PosNodes"
## [1] "Category: 1~3"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 885, number of events= 88 
## 
##                     coef exp(coef) se(coef)     z Pr(>|z|)
## GroupIntervention 0.2416    1.2733   0.2147 1.125     0.26
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention     1.273     0.7854     0.836     1.939
## 
## Concordance= 0.531  (se = 0.028 )
## Likelihood ratio test= 1.28  on 1 df,   p=0.3
## Wald test            = 1.27  on 1 df,   p=0.3
## Score (logrank) test = 1.27  on 1 df,   p=0.3
## 
## [1] "Explanatory: PosNodes"
## [1] "Category: 0"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 1775, number of events= 125 
## 
##                      coef exp(coef) se(coef)      z Pr(>|z|)
## GroupIntervention -0.1203    0.8867   0.1794 -0.671    0.503
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention    0.8867      1.128    0.6239      1.26
## 
## Concordance= 0.515  (se = 0.023 )
## Likelihood ratio test= 0.45  on 1 df,   p=0.5
## Wald test            = 0.45  on 1 df,   p=0.5
## Score (logrank) test = 0.45  on 1 df,   p=0.5
## 
## [1] "Explanatory: PosNodes"
## [1] "Category: 4~6"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 231, number of events= 40 
## 
##                     coef exp(coef) se(coef)     z Pr(>|z|)
## GroupIntervention 0.1119    1.1184   0.3167 0.353    0.724
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention     1.118     0.8941    0.6013     2.081
## 
## Concordance= 0.521  (se = 0.041 )
## Likelihood ratio test= 0.13  on 1 df,   p=0.7
## Wald test            = 0.12  on 1 df,   p=0.7
## Score (logrank) test = 0.13  on 1 df,   p=0.7
## 
## [1] "Explanatory: PosNodes"
## [1] "Category: >6"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 197, number of events= 63 
## 
##                      coef exp(coef) se(coef)      z Pr(>|z|)  
## GroupIntervention -0.4325    0.6489   0.2541 -1.702   0.0888 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention    0.6489      1.541    0.3943     1.068
## 
## Concordance= 0.556  (se = 0.032 )
## Likelihood ratio test= 2.92  on 1 df,   p=0.09
## Wald test            = 2.9  on 1 df,   p=0.09
## Score (logrank) test = 2.94  on 1 df,   p=0.09
## 
## [1] "Explanatory: TumorSize"
## [1] "Category: 0~2"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 1523, number of events= 94 
## 
##                      coef exp(coef) se(coef)      z Pr(>|z|)
## GroupIntervention -0.1567    0.8549   0.2070 -0.757    0.449
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention    0.8549       1.17    0.5698     1.283
## 
## Concordance= 0.509  (se = 0.027 )
## Likelihood ratio test= 0.57  on 1 df,   p=0.4
## Wald test            = 0.57  on 1 df,   p=0.4
## Score (logrank) test = 0.57  on 1 df,   p=0.4
## 
## [1] "Explanatory: TumorSize"
## [1] "Category: 2~3"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 863, number of events= 109 
## 
##                     coef exp(coef) se(coef)     z Pr(>|z|)
## GroupIntervention 0.2506    1.2848   0.1923 1.303    0.193
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention     1.285     0.7783    0.8814     1.873
## 
## Concordance= 0.53  (se = 0.025 )
## Likelihood ratio test= 1.71  on 1 df,   p=0.2
## Wald test            = 1.7  on 1 df,   p=0.2
## Score (logrank) test = 1.71  on 1 df,   p=0.2
## 
## [1] "Explanatory: TumorSize"
## [1] "Category: 3~4"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 335, number of events= 46 
## 
##                      coef exp(coef) se(coef)      z Pr(>|z|)  
## GroupIntervention -0.5830    0.5582   0.3022 -1.929   0.0537 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention    0.5582      1.791    0.3087     1.009
## 
## Concordance= 0.574  (se = 0.037 )
## Likelihood ratio test= 3.83  on 1 df,   p=0.05
## Wald test            = 3.72  on 1 df,   p=0.05
## Score (logrank) test = 3.83  on 1 df,   p=0.05
## 
## [1] "Explanatory: TumorSize"
## [1] "Category: 4~5"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 151, number of events= 25 
## 
##                      coef exp(coef) se(coef)      z Pr(>|z|)
## GroupIntervention -0.1039    0.9013   0.4005 -0.259    0.795
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention    0.9013       1.11    0.4111     1.976
## 
## Concordance= 0.532  (se = 0.051 )
## Likelihood ratio test= 0.07  on 1 df,   p=0.8
## Wald test            = 0.07  on 1 df,   p=0.8
## Score (logrank) test = 0.07  on 1 df,   p=0.8
## 
## [1] "Explanatory: TumorSize"
## [1] "Category: >5"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 216, number of events= 42 
## 
##                     coef exp(coef) se(coef)     z Pr(>|z|)
## GroupIntervention 0.1843    1.2023   0.3101 0.594    0.552
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention     1.202     0.8317    0.6547     2.208
## 
## Concordance= 0.517  (se = 0.04 )
## Likelihood ratio test= 0.35  on 1 df,   p=0.6
## Wald test            = 0.35  on 1 df,   p=0.6
## Score (logrank) test = 0.35  on 1 df,   p=0.6
## 
## [1] "Explanatory: PhysicalAct"
## [1] "Category: <210"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 850, number of events= 103 
## 
##                      coef exp(coef) se(coef)      z Pr(>|z|)
## GroupIntervention -0.1491    0.8615   0.1972 -0.756     0.45
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention    0.8615      1.161    0.5853     1.268
## 
## Concordance= 0.516  (se = 0.026 )
## Likelihood ratio test= 0.57  on 1 df,   p=0.4
## Wald test            = 0.57  on 1 df,   p=0.4
## Score (logrank) test = 0.57  on 1 df,   p=0.4
## 
## [1] "Explanatory: PhysicalAct"
## [1] "Category: >1290"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 738, number of events= 52 
## 
##                      coef exp(coef) se(coef)     z Pr(>|z|)
## GroupIntervention 0.01448   1.01458  0.27767 0.052    0.958
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention     1.015     0.9856    0.5888     1.748
## 
## Concordance= 0.512  (se = 0.036 )
## Likelihood ratio test= 0  on 1 df,   p=1
## Wald test            = 0  on 1 df,   p=1
## Score (logrank) test = 0  on 1 df,   p=1
## 
## [1] "Explanatory: PhysicalAct"
## [1] "Category: 616~1290"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 749, number of events= 71 
## 
##                     coef exp(coef) se(coef)     z Pr(>|z|)
## GroupIntervention 0.0336    1.0342   0.2374 0.141    0.887
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention     1.034      0.967    0.6493     1.647
## 
## Concordance= 0.491  (se = 0.031 )
## Likelihood ratio test= 0.02  on 1 df,   p=0.9
## Wald test            = 0.02  on 1 df,   p=0.9
## Score (logrank) test = 0.02  on 1 df,   p=0.9
## 
## [1] "Explanatory: PhysicalAct"
## [1] "Category: 211~615"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 751, number of events= 90 
## 
##                      coef exp(coef) se(coef)     z Pr(>|z|)
## GroupIntervention 0.03249   1.03303  0.21086 0.154    0.878
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention     1.033      0.968    0.6833     1.562
## 
## Concordance= 0.506  (se = 0.027 )
## Likelihood ratio test= 0.02  on 1 df,   p=0.9
## Wald test            = 0.02  on 1 df,   p=0.9
## Score (logrank) test = 0.02  on 1 df,   p=0.9
## 
## [1] "Explanatory: KCal"
## [1] "Category: <1430"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 1186, number of events= 124 
## 
##                      coef exp(coef) se(coef)    z Pr(>|z|)
## GroupIntervention 0.07737   1.08044  0.17985 0.43    0.667
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention      1.08     0.9255    0.7595     1.537
## 
## Concordance= 0.513  (se = 0.023 )
## Likelihood ratio test= 0.19  on 1 df,   p=0.7
## Wald test            = 0.19  on 1 df,   p=0.7
## Score (logrank) test = 0.19  on 1 df,   p=0.7
## 
## [1] "Explanatory: KCal"
## [1] "Category: 1681~1980"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 673, number of events= 65 
## 
##                      coef exp(coef) se(coef)      z Pr(>|z|)
## GroupIntervention -0.2500    0.7788   0.2505 -0.998    0.318
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention    0.7788      1.284    0.4766     1.272
## 
## Concordance= 0.529  (se = 0.032 )
## Likelihood ratio test= 1.01  on 1 df,   p=0.3
## Wald test            = 1  on 1 df,   p=0.3
## Score (logrank) test = 1  on 1 df,   p=0.3
## 
## [1] "Explanatory: KCal"
## [1] "Category: 1430~1680"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 776, number of events= 64 
## 
##                       coef exp(coef) se(coef)      z Pr(>|z|)
## GroupIntervention -0.02964   0.97080  0.25006 -0.119    0.906
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention    0.9708       1.03    0.5947     1.585
## 
## Concordance= 0.509  (se = 0.033 )
## Likelihood ratio test= 0.01  on 1 df,   p=0.9
## Wald test            = 0.01  on 1 df,   p=0.9
## Score (logrank) test = 0.01  on 1 df,   p=0.9
## 
## [1] "Explanatory: KCal"
## [1] "Category: >1980"
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group, data = ImputedData[idx, 
##     ])
## 
##   n= 453, number of events= 63 
## 
##                     coef exp(coef) se(coef)     z Pr(>|z|)
## GroupIntervention 0.0601    1.0619   0.2521 0.238    0.812
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## GroupIntervention     1.062     0.9417    0.6479     1.741
## 
## Concordance= 0.508  (se = 0.033 )
## Likelihood ratio test= 0.06  on 1 df,   p=0.8
## Wald test            = 0.06  on 1 df,   p=0.8
## Score (logrank) test = 0.06  on 1 df,   p=0.8

6 Variable Selection

library(glmnet)
library(ncvreg)

Forward and Backward Selection

fit.single <- coxph(Surv(SurvTime, Status) ~ Group, data = ImputedData)
fit.forward <- MASS::stepAIC(fit.single, 
                              scope=list(lower=~Group,
                                         upper=~Group + AgeIdx + CancerStage +
                                           HormoneRecep + TimeDiagRand + TumorDiff +
                                           PosNodes + TumorSize + PhysicalAct + KCal),
                              direction = "forward", trace = TRUE)
## Start:  AIC=4909.31
## Surv(SurvTime, Status) ~ Group
## 
##                Df    AIC
## + CancerStage   4 4802.6
## + PosNodes      3 4812.2
## + TumorSize     4 4853.1
## + TimeDiagRand  1 4863.4
## + TumorDiff     3 4877.2
## + HormoneRecep  3 4894.8
## + PhysicalAct   3 4900.7
## + KCal          3 4904.8
## + AgeIdx        1 4908.9
## <none>            4909.3
## 
## Step:  AIC=4802.57
## Surv(SurvTime, Status) ~ Group + CancerStage
## 
##                Df    AIC
## + TimeDiagRand  1 4752.0
## + TumorDiff     3 4787.2
## + HormoneRecep  3 4792.7
## + PhysicalAct   3 4796.7
## + PosNodes      3 4797.9
## + KCal          3 4798.6
## + AgeIdx        1 4799.6
## + TumorSize     4 4802.0
## <none>            4802.6
## 
## Step:  AIC=4751.96
## Surv(SurvTime, Status) ~ Group + CancerStage + TimeDiagRand
## 
##                Df    AIC
## + TumorDiff     3 4738.4
## + HormoneRecep  3 4743.4
## + AgeIdx        1 4744.2
## + PosNodes      3 4744.8
## + PhysicalAct   3 4746.7
## + KCal          3 4749.6
## + TumorSize     4 4750.5
## <none>            4752.0
## 
## Step:  AIC=4738.36
## Surv(SurvTime, Status) ~ Group + CancerStage + TimeDiagRand + 
##     TumorDiff
## 
##                Df    AIC
## + AgeIdx        1 4725.9
## + PosNodes      3 4732.2
## + PhysicalAct   3 4734.6
## + KCal          3 4735.8
## + HormoneRecep  3 4736.6
## <none>            4738.4
## + TumorSize     4 4738.6
## 
## Step:  AIC=4725.93
## Surv(SurvTime, Status) ~ Group + CancerStage + TimeDiagRand + 
##     TumorDiff + AgeIdx
## 
##                Df    AIC
## + PosNodes      3 4720.6
## + KCal          3 4720.7
## + PhysicalAct   3 4721.6
## + HormoneRecep  3 4724.3
## + TumorSize     4 4725.3
## <none>            4725.9
## 
## Step:  AIC=4720.61
## Surv(SurvTime, Status) ~ Group + CancerStage + TimeDiagRand + 
##     TumorDiff + AgeIdx + PosNodes
## 
##                Df    AIC
## + KCal          3 4715.9
## + PhysicalAct   3 4717.0
## + TumorSize     4 4719.9
## + HormoneRecep  3 4720.0
## <none>            4720.6
## 
## Step:  AIC=4715.94
## Surv(SurvTime, Status) ~ Group + CancerStage + TimeDiagRand + 
##     TumorDiff + AgeIdx + PosNodes + KCal
## 
##                Df    AIC
## + PhysicalAct   3 4713.0
## + HormoneRecep  3 4715.0
## + TumorSize     4 4715.5
## <none>            4715.9
## 
## Step:  AIC=4713.02
## Surv(SurvTime, Status) ~ Group + CancerStage + TimeDiagRand + 
##     TumorDiff + AgeIdx + PosNodes + KCal + PhysicalAct
## 
##                Df    AIC
## + HormoneRecep  3 4712.5
## + TumorSize     4 4712.6
## <none>            4713.0
## 
## Step:  AIC=4712.5
## Surv(SurvTime, Status) ~ Group + CancerStage + TimeDiagRand + 
##     TumorDiff + AgeIdx + PosNodes + KCal + PhysicalAct + HormoneRecep
## 
##             Df    AIC
## + TumorSize  4 4712.1
## <none>         4712.5
## 
## Step:  AIC=4712.14
## Surv(SurvTime, Status) ~ Group + CancerStage + TimeDiagRand + 
##     TumorDiff + AgeIdx + PosNodes + KCal + PhysicalAct + HormoneRecep + 
##     TumorSize
summary(fit.forward)
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group + CancerStage + 
##     TimeDiagRand + TumorDiff + AgeIdx + PosNodes + KCal + PhysicalAct + 
##     HormoneRecep + TumorSize, data = ImputedData)
## 
##   n= 3088, number of events= 316 
## 
##                                                  coef exp(coef) se(coef)      z
## GroupIntervention                            -0.02834   0.97205  0.11359 -0.250
## CancerStageStage I                           -0.21157   0.80932  0.22585 -0.937
## CancerStageStage IIB                          0.21518   1.24008  0.23371  0.921
## CancerStageStage IIIA                        -0.26009   0.77098  0.52111 -0.499
## CancerStageStage IIIB                        -0.07770   0.92524  0.59532 -0.131
## TimeDiagRand                                 -0.45728   0.63300  0.06310 -7.247
## TumorDiffGrade II, Moderately Differentiated  0.18902   1.20807  0.22685  0.833
## TumorDiffGrade I, Well Differentiated        -0.56250   0.56978  0.30692 -1.833
## TumorDiffGrade III, Poorly Differentiated     0.28546   1.33037  0.22868  1.248
## AgeIdx>=55                                    0.49611   1.64232  0.11778  4.212
## PosNodes1~3                                   0.13751   1.14742  0.22773  0.604
## PosNodes4~6                                   0.98205   2.66994  0.55494  1.770
## PosNodes>6                                    1.48566   4.41790  0.56637  2.623
## KCal1430~1680                                -0.23925   0.78722  0.15554 -1.538
## KCal>1980                                     0.32973   1.39059  0.15688  2.102
## KCal1681~1980                                -0.05403   0.94740  0.15542 -0.348
## PhysicalAct>1290                             -0.41813   0.65828  0.17184 -2.433
## PhysicalAct211~615                            0.03480   1.03541  0.14627  0.238
## PhysicalAct616~1290                          -0.15756   0.85423  0.15626 -1.008
## HormoneRecepER-/PR+                           0.12710   1.13554  0.26199  0.485
## HormoneRecepER+/PR-                          -0.07141   0.93108  0.18983 -0.376
## HormoneRecepER+/PR+                          -0.30550   0.73675  0.14544 -2.101
## TumorSize2~3                                  0.50181   1.65170  0.18125  2.769
## TumorSize3~4                                  0.35273   1.42294  0.23475  1.503
## TumorSize4~5                                  0.37824   1.45971  0.27172  1.392
## TumorSize>5                                   0.57557   1.77814  0.26378  2.182
##                                              Pr(>|z|)    
## GroupIntervention                             0.80295    
## CancerStageStage I                            0.34889    
## CancerStageStage IIB                          0.35720    
## CancerStageStage IIIA                         0.61771    
## CancerStageStage IIIB                         0.89615    
## TimeDiagRand                                 4.27e-13 ***
## TumorDiffGrade II, Moderately Differentiated  0.40471    
## TumorDiffGrade I, Well Differentiated         0.06684 .  
## TumorDiffGrade III, Poorly Differentiated     0.21193    
## AgeIdx>=55                                   2.53e-05 ***
## PosNodes1~3                                   0.54594    
## PosNodes4~6                                   0.07678 .  
## PosNodes>6                                    0.00871 ** 
## KCal1430~1680                                 0.12400    
## KCal>1980                                     0.03557 *  
## KCal1681~1980                                 0.72810    
## PhysicalAct>1290                              0.01496 *  
## PhysicalAct211~615                            0.81196    
## PhysicalAct616~1290                           0.31330    
## HormoneRecepER-/PR+                           0.62758    
## HormoneRecepER+/PR-                           0.70679    
## HormoneRecepER+/PR+                           0.03568 *  
## TumorSize2~3                                  0.00563 ** 
## TumorSize3~4                                  0.13295    
## TumorSize4~5                                  0.16391    
## TumorSize>5                                   0.02911 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                              exp(coef) exp(-coef) lower .95
## GroupIntervention                               0.9721     1.0287    0.7780
## CancerStageStage I                              0.8093     1.2356    0.5198
## CancerStageStage IIB                            1.2401     0.8064    0.7844
## CancerStageStage IIIA                           0.7710     1.2970    0.2776
## CancerStageStage IIIB                           0.9252     1.0808    0.2881
## TimeDiagRand                                    0.6330     1.5798    0.5594
## TumorDiffGrade II, Moderately Differentiated    1.2081     0.8278    0.7745
## TumorDiffGrade I, Well Differentiated           0.5698     1.7551    0.3122
## TumorDiffGrade III, Poorly Differentiated       1.3304     0.7517    0.8498
## AgeIdx>=55                                      1.6423     0.6089    1.3038
## PosNodes1~3                                     1.1474     0.8715    0.7343
## PosNodes4~6                                     2.6699     0.3745    0.8998
## PosNodes>6                                      4.4179     0.2264    1.4559
## KCal1430~1680                                   0.7872     1.2703    0.5804
## KCal>1980                                       1.3906     0.7191    1.0225
## KCal1681~1980                                   0.9474     1.0555    0.6986
## PhysicalAct>1290                                0.6583     1.5191    0.4700
## PhysicalAct211~615                              1.0354     0.9658    0.7773
## PhysicalAct616~1290                             0.8542     1.1706    0.6289
## HormoneRecepER-/PR+                             1.1355     0.8806    0.6795
## HormoneRecepER+/PR-                             0.9311     1.0740    0.6418
## HormoneRecepER+/PR+                             0.7368     1.3573    0.5540
## TumorSize2~3                                    1.6517     0.6054    1.1579
## TumorSize3~4                                    1.4229     0.7028    0.8982
## TumorSize4~5                                    1.4597     0.6851    0.8570
## TumorSize>5                                     1.7781     0.5624    1.0603
##                                              upper .95
## GroupIntervention                               1.2144
## CancerStageStage I                              1.2600
## CancerStageStage IIB                            1.9606
## CancerStageStage IIIA                           2.1410
## CancerStageStage IIIB                           2.9716
## TimeDiagRand                                    0.7163
## TumorDiffGrade II, Moderately Differentiated    1.8845
## TumorDiffGrade I, Well Differentiated           1.0398
## TumorDiffGrade III, Poorly Differentiated       2.0827
## AgeIdx>=55                                      2.0688
## PosNodes1~3                                     1.7929
## PosNodes4~6                                     7.9226
## PosNodes>6                                     13.4064
## KCal1430~1680                                   1.0678
## KCal>1980                                       1.8912
## KCal1681~1980                                   1.2848
## PhysicalAct>1290                                0.9219
## PhysicalAct211~615                              1.3792
## PhysicalAct616~1290                             1.1603
## HormoneRecepER-/PR+                             1.8976
## HormoneRecepER+/PR-                             1.3507
## HormoneRecepER+/PR+                             0.9798
## TumorSize2~3                                    2.3562
## TumorSize3~4                                    2.2543
## TumorSize4~5                                    2.4863
## TumorSize>5                                     2.9819
## 
## Concordance= 0.739  (se = 0.014 )
## Likelihood ratio test= 247.2  on 26 df,   p=<2e-16
## Wald test            = 248.9  on 26 df,   p=<2e-16
## Score (logrank) test = 285.9  on 26 df,   p=<2e-16
fit.all <- coxph(Surv(SurvTime, Status) ~ Group + AgeIdx + CancerStage + HormoneRecep + TimeDiagRand + TumorDiff + PosNodes + TumorSize + PhysicalAct + KCal, data = ImputedData)
fit.backward <- MASS::stepAIC(fit.all, 
                              scope=list(lower=~Group,
                                         upper=~Group + AgeIdx + CancerStage +
                                           HormoneRecep + TimeDiagRand + TumorDiff +
                                           PosNodes + TumorSize + PhysicalAct + KCal),
                              direction = "backward", trace = TRUE)
## Start:  AIC=4712.14
## Surv(SurvTime, Status) ~ Group + AgeIdx + CancerStage + HormoneRecep + 
##     TimeDiagRand + TumorDiff + PosNodes + TumorSize + PhysicalAct + 
##     KCal
## 
##                Df    AIC
## - CancerStage   4 4707.0
## <none>            4712.1
## - TumorSize     4 4712.5
## - HormoneRecep  3 4712.6
## - PhysicalAct   3 4714.6
## - PosNodes      3 4715.5
## - KCal          3 4716.2
## - TumorDiff     3 4720.7
## - AgeIdx        1 4727.6
## - TimeDiagRand  1 4766.6
## 
## Step:  AIC=4706.99
## Surv(SurvTime, Status) ~ Group + AgeIdx + HormoneRecep + TimeDiagRand + 
##     TumorDiff + PosNodes + TumorSize + PhysicalAct + KCal
## 
##                Df    AIC
## <none>            4707.0
## - HormoneRecep  3 4707.6
## - PhysicalAct   3 4709.5
## - KCal          3 4711.3
## - TumorDiff     3 4716.3
## - TumorSize     4 4722.3
## - AgeIdx        1 4723.0
## - TimeDiagRand  1 4760.8
## - PosNodes      3 4765.4
summary(fit.backward)
## Call:
## coxph(formula = Surv(SurvTime, Status) ~ Group + AgeIdx + HormoneRecep + 
##     TimeDiagRand + TumorDiff + PosNodes + TumorSize + PhysicalAct + 
##     KCal, data = ImputedData)
## 
##   n= 3088, number of events= 316 
## 
##                                                  coef exp(coef) se(coef)      z
## GroupIntervention                            -0.02594   0.97440  0.11346 -0.229
## AgeIdx>=55                                    0.50451   1.65617  0.11777  4.284
## HormoneRecepER-/PR+                           0.13253   1.14171  0.26193  0.506
## HormoneRecepER+/PR-                          -0.06921   0.93314  0.18956 -0.365
## HormoneRecepER+/PR+                          -0.30789   0.73499  0.14549 -2.116
## TimeDiagRand                                 -0.45330   0.63553  0.06294 -7.202
## TumorDiffGrade II, Moderately Differentiated  0.17279   1.18861  0.22590  0.765
## TumorDiffGrade I, Well Differentiated        -0.58955   0.55458  0.30558 -1.929
## TumorDiffGrade III, Poorly Differentiated     0.28144   1.32504  0.22811  1.234
## PosNodes1~3                                   0.32105   1.37857  0.14062  2.283
## PosNodes4~6                                   0.79311   2.21027  0.18762  4.227
## PosNodes>6                                    1.39033   4.01616  0.16788  8.282
## TumorSize2~3                                  0.62380   1.86601  0.14388  4.336
## TumorSize3~4                                  0.51329   1.67078  0.18624  2.756
## TumorSize4~5                                  0.52591   1.69201  0.23480  2.240
## TumorSize>5                                   0.70224   2.01827  0.20374  3.447
## PhysicalAct>1290                             -0.41795   0.65840  0.17169 -2.434
## PhysicalAct211~615                            0.03889   1.03965  0.14621  0.266
## PhysicalAct616~1290                          -0.15212   0.85888  0.15601 -0.975
## KCal1430~1680                                -0.23418   0.79122  0.15535 -1.507
## KCal>1980                                     0.34328   1.40956  0.15669  2.191
## KCal1681~1980                                -0.03749   0.96321  0.15513 -0.242
##                                              Pr(>|z|)    
## GroupIntervention                            0.819190    
## AgeIdx>=55                                   1.84e-05 ***
## HormoneRecepER-/PR+                          0.612885    
## HormoneRecepER+/PR-                          0.715049    
## HormoneRecepER+/PR+                          0.034319 *  
## TimeDiagRand                                 5.93e-13 ***
## TumorDiffGrade II, Moderately Differentiated 0.444347    
## TumorDiffGrade I, Well Differentiated        0.053695 .  
## TumorDiffGrade III, Poorly Differentiated    0.217272    
## PosNodes1~3                                  0.022420 *  
## PosNodes4~6                                  2.37e-05 ***
## PosNodes>6                                    < 2e-16 ***
## TumorSize2~3                                 1.45e-05 ***
## TumorSize3~4                                 0.005849 ** 
## TumorSize4~5                                 0.025098 *  
## TumorSize>5                                  0.000567 ***
## PhysicalAct>1290                             0.014919 *  
## PhysicalAct211~615                           0.790264    
## PhysicalAct616~1290                          0.329523    
## KCal1430~1680                                0.131708    
## KCal>1980                                    0.028465 *  
## KCal1681~1980                                0.809055    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                              exp(coef) exp(-coef) lower .95
## GroupIntervention                               0.9744     1.0263    0.7801
## AgeIdx>=55                                      1.6562     0.6038    1.3148
## HormoneRecepER-/PR+                             1.1417     0.8759    0.6833
## HormoneRecepER+/PR-                             0.9331     1.0717    0.6436
## HormoneRecepER+/PR+                             0.7350     1.3606    0.5526
## TimeDiagRand                                    0.6355     1.5735    0.5618
## TumorDiffGrade II, Moderately Differentiated    1.1886     0.8413    0.7634
## TumorDiffGrade I, Well Differentiated           0.5546     1.8032    0.3047
## TumorDiffGrade III, Poorly Differentiated       1.3250     0.7547    0.8473
## PosNodes1~3                                     1.3786     0.7254    1.0465
## PosNodes4~6                                     2.2103     0.4524    1.5302
## PosNodes>6                                      4.0162     0.2490    2.8901
## TumorSize2~3                                    1.8660     0.5359    1.4075
## TumorSize3~4                                    1.6708     0.5985    1.1598
## TumorSize4~5                                    1.6920     0.5910    1.0679
## TumorSize>5                                     2.0183     0.4955    1.3538
## PhysicalAct>1290                                0.6584     1.5188    0.4703
## PhysicalAct211~615                              1.0397     0.9619    0.7806
## PhysicalAct616~1290                             0.8589     1.1643    0.6326
## KCal1430~1680                                   0.7912     1.2639    0.5835
## KCal>1980                                       1.4096     0.7094    1.0368
## KCal1681~1980                                   0.9632     1.0382    0.7107
##                                              upper .95
## GroupIntervention                               1.2171
## AgeIdx>=55                                      2.0862
## HormoneRecepER-/PR+                             1.9077
## HormoneRecepER+/PR-                             1.3530
## HormoneRecepER+/PR+                             0.9775
## TimeDiagRand                                    0.7190
## TumorDiffGrade II, Moderately Differentiated    1.8507
## TumorDiffGrade I, Well Differentiated           1.0094
## TumorDiffGrade III, Poorly Differentiated       2.0720
## PosNodes1~3                                     1.8160
## PosNodes4~6                                     3.1926
## PosNodes>6                                      5.5810
## TumorSize2~3                                    2.4739
## TumorSize3~4                                    2.4068
## TumorSize4~5                                    2.6808
## TumorSize>5                                     3.0089
## PhysicalAct>1290                                0.9218
## PhysicalAct211~615                              1.3846
## PhysicalAct616~1290                             1.1661
## KCal1430~1680                                   1.0728
## KCal>1980                                       1.9163
## KCal1681~1980                                   1.3055
## 
## Concordance= 0.739  (se = 0.014 )
## Likelihood ratio test= 244.4  on 22 df,   p=<2e-16
## Wald test            = 247.4  on 22 df,   p=<2e-16
## Score (logrank) test = 281.8  on 22 df,   p=<2e-16

LASSO

num_col = dim(ImputedData[,-c(1,3)])[2]
fit.lasso   <- glmnet(ImputedData[,-c(1,3)], Surv(ImputedData$SurvTime, ImputedData$Status), alpha = 1, family = 'cox', penalty.factor=c(0, rep(1, num_col-1)), lambda=seq(0, 0.1, 0.01))
  
coef(fit.lasso)
## 10 x 11 sparse Matrix of class "dgCMatrix"
##                                                                         
## Group        -0.02009475 -0.02009475 -0.02009475 -0.02009475 -0.02009475
## AgeIdx        .           .           .           .           .         
## CancerStage   .           .           .           .           .         
## HormoneRecep  .           .           .           .           .         
## TimeDiagRand  .           .           .           .           .         
## TumorDiff     .           .           .           .           .         
## PosNodes      .           .           .           .           .         
## TumorSize     .           .           .           .           .         
## PhysicalAct   .           .           .           .           .         
## KCal          .           .           .           .           .         
##                                                                         
## Group        -0.02329210 -0.02860239 -0.03427818 -0.03989189 -0.03822995
## AgeIdx        .           .           .           .           0.11822610
## CancerStage   .           .           .           .           .         
## HormoneRecep  .           .           .          -0.01633862 -0.09691284
## TimeDiagRand  .           .          -0.05145592 -0.16626020 -0.29239236
## TumorDiff     .           .           .           .           .         
## PosNodes      0.08127026  0.19338498  0.28934444  0.35492725  0.41572189
## TumorSize     .           .           0.01051445  0.06735136  0.11928396
## PhysicalAct   .           .           .           .           .         
## KCal          .           .           .           .           .         
##                         
## Group        -0.02749967
## AgeIdx        0.43835174
## CancerStage   0.01341706
## HormoneRecep -0.16126146
## TimeDiagRand -0.43551918
## TumorDiff     0.04968574
## PosNodes      0.46079892
## TumorSize     0.17546330
## PhysicalAct  -0.02794548
## KCal          0.04402324
plot(fit.lasso)

SCAD

fit.scad = ncvsurv(ImputedData[,-c(1,3)], Surv(ImputedData$SurvTime, ImputedData$Status), penalty = 'SCAD')
plot(fit.scad)

7 Competing Risk Models

(Table 3) Study Events

# Confirmed breast cancer event
a1 = endpoint$`Intervention Group`
b1 = endpoint$`Breast Cancer Status as of 6/1/2006 or last prior contact`
table(a1, b1)
##               b1
## a1             Confirmed – Distant Recurrence\t Confirmed – Local Recurrence
##   Comparison                                189                           28
##   Intervention                              168                           35
##               b1
## a1             Confirmed – New Primary Breast Cancer
##   Comparison                                      35
##   Intervention                                    43
##               b1
## a1             Confirmed – Regional Recurrence No Evidence of Recurrence
##   Comparison                                10                      1289
##   Intervention                              10                      1281
# Confirmed deaths
b2 = endpoint$`Breast Cancer Contribute to Death`
table(a1, b2)
##               b2
## a1             Dead from a cause other than Breast Cancer
##   Comparison                                           25
##   Intervention                                         28
##               b2
## a1             Dead from Breast Cancer
##   Comparison                       135
##   Intervention                     126
##               b2
## a1             Dead from Cancer, not confirmed breast but likely so Not Dead
##   Comparison                                                      0     1391
##   Intervention                                                    1     1382

Metaheuristics with Applications to Cox’s Regression and Variable Selection